Objective: Summary of the demographics and soil characteristics of the Rwanda long term soil health study.
Add in details and links on study methodology here.
library(knitr)
library(ggplot2)
library(stringr)
suppressMessages(library(dplyr))
library(sp)
suppressMessages(library(dismo))
suppressMessages(library(stargazer))
library(leaflet)
library(XML)
suppressMessages(library(maptools))
library(automap)
wd <- "/Users/mlowes/drive/soil health study/data/rw baseline"
dd <- paste(wd, "data", sep = "/")
od <- paste(wd, "output", sep="/")
md <- paste(wd, "maps", sep="/")
drive <- "~/drive/r_help/4_output/statistical_test_outputs"
#load data:
# This data is being drawn from the Soil lab repository. It has the baseline data with it
d <- read.csv(paste(dd, "Rwanda_shs_commcare_soil data_final.csv", sep="/"), stringsAsFactors=FALSE)
I’m using the merged data from Emmanuel for now but I will need to go back and download the demographic data fresh from Commcare and replicate the merge process using “Identifiers with SSN_final” provided by Emmanuel.
Included code to pair demographic and soil data?: Not yet
Now let’s start cleaning the demographic variables
# take out weird CommCare stuff
d[d=="---"] <- NA
# take out demographic and text_final_question from variable names
#to.change <- c("text_final_questions.", "intro_champ_echantillon.",
# "demographic_info.", "other_inputs_", "crop1_15b_inputs.", "crop2_15b_inputs.",
# "^15b.", "historical_intro.")
#names(d) <- tapply(to.change, function(x) { gsub(x, "", names(d))})
names(d) <- gsub("text_fil_questions", "", names(d))
names(d) <- gsub("intro_champ_echantillon", "", names(d))
names(d) <- gsub("demographic_info", "", names(d))
names(d) <- gsub("other_inputs_", "", names(d))
names(d) <- gsub("crop1_15b_inputs", "", names(d))
names(d) <- gsub("crop2_15b_inputs", "", names(d))
names(d) <- gsub("^15b", "", names(d))
names(d) <- gsub("historical_intro", "", names(d))
names(d)[names(d)=="field_dim"] <- "field_dim1"
names(d)[names(d)=="v51"] <- "field_dim2"
Take care of demographic data formatting issues
# deal with names and drop unnecessary variables
d <- d %>%
dplyr::select(-c(rownumber, infoformid, introductiond_accept, photo,
infocompleted_time,
enumerator_me, contains("phone"), farmer_me, farmersurme, farmerme,
d_respondent, additiolsamplepackedandsenttol, additiolsamplerequestedfromlab,
datedryingcompleteifnecessary, driedindistrictifnecessary, senttohqyo,
collectedindistrictyo, excessstoredathq_, receivedathq_,dateofinitialdryingifnecessary,
samplecollectedinfieldyo, field_des, samplewetordry)) %>%
rename(
female = sex,
age = age_cultivateur,
own = d_own,
client = d_client) %>%
mutate(
female= ifelse(female=="gore", 1,0),
field.size = field_dim1*field_dim2
)
d$total.seasons <- apply(d[, grep("d_season_list", names(d))], 1, function(x) {
sum(x, na.rm=T)})
Fix some more variable names:
names(d)[names(d)=="field_kg_fert1_1"] <- "field_kg_fert1_15b"
names(d)[names(d)=="field_kg_fert2_1"] <- "field_kg_fert2_15b"
names(d)[names(d)=="field_kg_compost"] <- "field_kg_compost_15b"
Recode variables to numeric:
# recode to numeric
varlist <- c("client", "own", "crop1_15b_seedkg", "crop1_15b_yield", "crop1_15b_yield_",
"crop2_15b_seedkg", "crop2_15b_yield", "crop2_15b_yield_", "field_kg_fert1_15b",
"field_kg_fert2_15b", "field_kg_compost_15b", "d_lime_15b", "kg_lime_15b")
# check that there aren't values hidden in the character variables
#apply(d[,varlist], 2, function(x){table(x, useNA='ifany')})
# recode characters to numerics
d[, varlist] <- sapply(d[,varlist], as.numeric)
# divide out GPS coordinates
# http://rfunction.com/archives/1499
# replace the blank gps_pic_guide with info
d <- cbind(d, str_split_fixed(d$gps_pic_guid, " ", n=4))
names(d)[106:109] <- c("lat", "lon", "alt", "precision")
d[,c("lat", "lon", "alt", "precision")] <- sapply(d[,c("lat", "lon", "alt", "precision")],
function(x){as.numeric(as.character(x))})
Check out missing data in the district variable:
#table(d$district, useNA = 'ifany')
length(d[d$district=="",])
## [1] 109
# these are the sample for which we have soil data but not survey data drop these for now
d <- d[-which(d$district==""),]
Cleaning of soil data: Come back, check and clean the soil data before outputting to clean data set. Plot each of the soil variables to look for unrealistic values.
dim(d[is.na(d$m3.Ca),])
## [1] 11 109
d <- d[-which(is.na(d$m3.Ca)),]
summary(d[,c("m3.Ca", "m3.Mg", "pH", "Total.N", "Total.C")])
## m3.Ca m3.Mg pH Total.N
## Min. : 220.5 Min. : 38.26 Min. :4.547 Min. :0.05523
## 1st Qu.: 443.0 1st Qu.: 114.16 1st Qu.:5.021 1st Qu.:0.13596
## Median : 720.2 Median : 184.98 Median :5.539 Median :0.15552
## Mean : 876.4 Mean : 207.82 Mean :5.544 Mean :0.15717
## 3rd Qu.:1157.9 3rd Qu.: 270.26 3rd Qu.:6.009 3rd Qu.:0.17886
## Max. :3669.2 Max. :1008.93 Max. :7.211 Max. :0.24701
## Total.C
## Min. :0.8988
## 1st Qu.:1.7227
## Median :2.1118
## Mean :2.1096
## 3rd Qu.:2.3657
## Max. :4.1620
Let’s check out the rows for which we don’t have soil data and drop them as they won’t contribute to the full picture.
soilVars <- c("m3.Ca", "m3.Mg", "pH", "Total.N", "Total.C")
for(i in 1:length(soilVars)){
print(
ggplot(data=d, aes(x=as.factor(client), y=d[,soilVars[i]])) +
geom_boxplot() +
labs(x="Tubura Farmer", y=soilVars[i], title = paste("RW baseline soil - ", soilVars[i], sep = ""))
)
}
There are biologically predictable relationships between soil chemical characteristics. For instance, we expect Ca and Mg to move in the same direction and be positively correlated with pH. If we had Aluminum as an outcome, we’d expect pH to be negatively correlated with soluable aluminum. Let’s look quickly to confirm if those relationships are present:
ggplot(d, aes(x=m3.Ca, y=m3.Mg)) + geom_point() +
labs(x = "Calcium", y= "Magnesium", title="Calcium/Magnesium relationship")
ggplot(d, aes(x=pH, y=m3.Ca)) + geom_point() +
labs(x = "pH", y="Calcium", title = "pH and Calcium relationship")
ggplot(d, aes(x=pH, y=m3.Mg)) + geom_point() +
labs(x = "pH", y="Calcium", title = "pH and Magnesium relationship")
ggplot(d, aes(x=Total.C, y=Total.N)) + geom_point() +
labs(x = "Total Carbon", y="Total Nitrogen", title = "Carbon and Nitrogen relationship")
The soil characteristics are moving in the manner that is consistent with our understanding of soil chemical processes.
Save clean demographic and soil data to external file
write.csv(d, file=paste(paste(wd, "data", sep = "/"), "shs rw baseline.Rdata", sep = "/"))
save(d, file=paste(paste(wd, "data", sep = "/"), "shs rw baseline.Rdata", sep = "/"))
Produce a simple map of where our observations are
e <- d[!is.na(d$lon),]
ss <- SpatialPointsDataFrame(coords = e[, c("lon", "lat")], data=e)
map <- leaflet() %>% addTiles() %>%
setView(lng=rwanda$longitude, lat=rwanda$latitude, zoom=8) %>%
addCircleMarkers(lng=ss$lon, lat=ss$lat, clusterOptions = markerClusterOptions())
map
count <- d %>% group_by(district) %>%
dplyr::summarize(
t.count = sum(ifelse(client==1,1,0)),
c.count = sum(ifelse(client==0,1,0)),
total = n()
) %>% ungroup()
count <- as.data.frame(count)
write.csv(count, file=paste(od, "final rw sample breakdown.csv", sep="/"), row.names=F)
as.data.frame(count)
## district t.count c.count total
## 1 Gatsibo_LWH 58 59 117
## 2 Gatsibo_NLWH 87 81 168
## 3 Gisagara 46 46 92
## 4 Huye 114 115 229
## 5 Karongi 163 159 322
## 6 Kayonza 55 58 113
## 7 Mugonero 131 129 260
## 8 Nyamagabe 56 56 112
## 9 Nyamasheke 147 146 293
## 10 Nyanza 46 46 92
## 11 Nyaruguru 46 46 92
## 12 Rutsiro 166 166 332
## 13 Rwamaga 110 111 221
Let’s see how balanced our farmers are in terms of demographic variables. Tubura farmers were selected based on (list criteria) and control farmers in the same area tha fit the same criteria were also selected. No matching process has been performed to identify the control farmers that most closely resemble the Tubura farmers in the sample.
out.list <- c("female", "age", "hhsize", "own", "field.size",
"n_season_fert", "n_season_compost", "n_season_lime", "n_season_fallow", "n_seasons_leg_1", "n_seasons_leg_2", "m3.Ca", "m3.Mg",
"pH", "Total.C", "Total.N")
output <- do.call(rbind, lapply(out.list, function(x) {
out <- t.test(d[,x] ~ d[,"client"], data=d)
tab <- data.frame(out[[5]][[1]], out[[5]][[2]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
return(tab)
}))
# use p.adjust with bonferroni correction
output$pvalue <- p.adjust(output$pvalue, method="fdr")
rownames(output) <- out.list
output <- output[order(output$pvalue),]
output$pvalue <- ifelse(output[, 3] < 0.001, "< 0.001", round(output[, 3], 3))
colnames(output) <- c("Non-Tubura", "Tubura Client", "p-value")
print(kable(output))
| Non-Tubura | Tubura Client | p-value | |
|---|---|---|---|
| n_season_fert | 0.833 | 3.260 | < 0.001 |
| female | 0.649 | 0.493 | < 0.001 |
| age | 48.111 | 44.002 | < 0.001 |
| hhsize | 4.896 | 5.416 | < 0.001 |
| n_season_lime | 0.132 | 0.228 | < 0.001 |
| own | 0.956 | 0.932 | 0.024 |
| n_season_compost | 5.523 | 5.818 | 0.126 |
| n_season_fallow | 0.570 | 0.690 | 0.136 |
| Total.N | 0.158 | 0.156 | 0.138 |
| m3.Ca | 894.242 | 858.574 | 0.157 |
| m3.Mg | 211.917 | 203.756 | 0.157 |
| field.size | 602.096 | 667.258 | 0.214 |
| pH | 5.556 | 5.532 | 0.348 |
| Total.C | 2.120 | 2.099 | 0.348 |
| n_seasons_leg_1 | 2.130 | 2.233 | 0.374 |
| n_seasons_leg_2 | 3.352 | 5.398 | 0.453 |
#write table
write.csv(output, file=paste(od, "baseline balance.csv", sep="/"), row.names=T)
Demographic variables We are not well balanced along the main demographic variables we collected, sex, age and HH size. For the purposes of inference we can test some matching algorithms to improve the match between Tubura and control farmers.
Agricultural practice variables We are decently balanced along agricultural practice variables. Our course of action here is similiar to our options with the demographic variables.
Soil Variables We are balanced on the primary soil variables of interest betwen our Tubura farmers and the comparison farmers.
dist.output <- do.call(rbind, lapply(split(d, d$district), function(x) {
tab <- do.call(rbind, lapply(out.list, function(y) {
out <- t.test(x[,y] ~ x[,"client"], data=x)
tab <- data.frame(out[[5]][[1]], out[[5]][[2]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
#tab[,3] <- p.adjust(tab[,3], method="holm")
#tab[,3] <- ifelse(tab[,3] < 0.001, "< 0.001", round(tab[,3],3))
#print(tab)
return(tab)
}))
return(data.frame(district = unique(x$district), tab))
}))
rownames(dist.output) <- NULL
dist.output$variable <- rep(out.list,13)
# order variables
dist.output <- dist.output[, c(1, 5, 2:4)]
dist.output$pvalue <- p.adjust(dist.output$pvalue, method="fdr")
dist.output <- dist.output[order(dist.output$pvalue),]
dist.output$pvalue <- ifelse(dist.output$pvalue < 0.001, "< 0.001", round(dist.output$pvalue,3))
colnames(dist.output) <- c("District", "Varible", "Non-Tubura", "Tubura Client", "p-value")
print(kable(dist.output))
| District | Varible | Non-Tubura | Tubura Client | p-value | |
|---|---|---|---|---|---|
| 70 | Karongi | n_season_fert | 0.447 | 3.785 | < 0.001 |
| 102 | Mugonero | n_season_fert | 0.597 | 4.313 | < 0.001 |
| 182 | Rutsiro | n_season_fert | 1.072 | 4.175 | < 0.001 |
| 134 | Nyamasheke | n_season_fert | 1.507 | 5.068 | < 0.001 |
| 198 | Rwamaga | n_season_fert | 1.054 | 2.645 | < 0.001 |
| 54 | Huye | n_season_fert | 0.322 | 2.246 | < 0.001 |
| 22 | Gatsibo_NLWH | n_season_fert | 0.296 | 1.517 | < 0.001 |
| 118 | Nyamagabe | n_season_fert | 1.554 | 4.000 | < 0.001 |
| 136 | Nyamasheke | n_season_lime | 0.041 | 0.327 | < 0.001 |
| 86 | Kayonza | n_season_fert | 0.776 | 2.073 | < 0.001 |
| 194 | Rwamaga | age | 52.072 | 44.245 | 0.001 |
| 131 | Nyamasheke | hhsize | 4.685 | 5.701 | 0.003 |
| 65 | Karongi | female | 0.667 | 0.466 | 0.004 |
| 163 | Nyaruguru | hhsize | 4.717 | 6.217 | 0.004 |
| 45 | Gisagara | m3.Mg | 241.171 | 190.036 | 0.006 |
| 50 | Huye | age | 49.400 | 43.395 | 0.009 |
| 44 | Gisagara | m3.Ca | 1203.823 | 895.928 | 0.01 |
| 145 | Nyanza | female | 0.543 | 0.217 | 0.012 |
| 150 | Nyanza | n_season_fert | 0.152 | 1.326 | 0.019 |
| 18 | Gatsibo_NLWH | age | 46.210 | 39.897 | 0.019 |
| 17 | Gatsibo_NLWH | female | 0.556 | 0.322 | 0.021 |
| 2 | Gatsibo_LWH | age | 49.949 | 41.931 | 0.027 |
| 113 | Nyamagabe | female | 0.750 | 0.482 | 0.03 |
| 81 | Kayonza | female | 0.672 | 0.400 | 0.03 |
| 20 | Gatsibo_NLWH | own | 1.000 | 0.908 | 0.031 |
| 104 | Mugonero | n_season_lime | 0.023 | 0.176 | 0.031 |
| 193 | Rwamaga | female | 0.739 | 0.555 | 0.031 |
| 1 | Gatsibo_LWH | female | 0.492 | 0.241 | 0.035 |
| 179 | Rutsiro | hhsize | 4.855 | 5.608 | 0.046 |
| 99 | Mugonero | hhsize | 5.054 | 5.802 | 0.049 |
| 98 | Mugonero | age | 49.140 | 44.290 | 0.067 |
| 161 | Nyaruguru | female | 0.652 | 0.391 | 0.077 |
| 48 | Gisagara | Total.N | 0.154 | 0.142 | 0.079 |
| 47 | Gisagara | Total.C | 2.005 | 1.816 | 0.086 |
| 184 | Rutsiro | n_season_lime | 0.096 | 0.325 | 0.086 |
| 135 | Nyamasheke | n_season_compost | 5.521 | 6.544 | 0.089 |
| 59 | Huye | n_seasons_leg_2 | 5.278 | 3.667 | 0.096 |
| 138 | Nyamasheke | n_seasons_leg_1 | 1.658 | 2.327 | 0.101 |
| 166 | Nyaruguru | n_season_fert | 1.065 | 2.130 | 0.104 |
| 35 | Gisagara | hhsize | 4.326 | 5.348 | 0.113 |
| 6 | Gatsibo_LWH | n_season_fert | 1.475 | 2.552 | 0.121 |
| 51 | Huye | hhsize | 4.574 | 5.132 | 0.141 |
| 115 | Nyamagabe | hhsize | 4.696 | 5.482 | 0.141 |
| 41 | Gisagara | n_season_fallow | 0.087 | 0.500 | 0.184 |
| 43 | Gisagara | n_seasons_leg_2 | 3.283 | 2.370 | 0.184 |
| 148 | Nyanza | own | 1.000 | 0.913 | 0.197 |
| 196 | Rwamaga | own | 0.973 | 0.909 | 0.197 |
| 46 | Gisagara | pH | 5.945 | 5.759 | 0.213 |
| 27 | Gatsibo_NLWH | n_seasons_leg_2 | 3.889 | 3.046 | 0.22 |
| 38 | Gisagara | n_season_fert | 0.348 | 1.087 | 0.22 |
| 72 | Karongi | n_season_lime | 0.013 | 0.098 | 0.225 |
| 155 | Nyanza | n_seasons_leg_2 | 3.478 | 2.130 | 0.225 |
| 88 | Kayonza | n_season_lime | 0.707 | 0.527 | 0.226 |
| 201 | Rwamaga | n_season_fallow | 0.712 | 1.218 | 0.226 |
| 114 | Nyamagabe | age | 48.875 | 43.768 | 0.229 |
| 176 | Nyaruguru | Total.N | 0.170 | 0.161 | 0.238 |
| 205 | Rwamaga | m3.Mg | 293.168 | 273.526 | 0.298 |
| 146 | Nyanza | age | 50.152 | 44.717 | 0.335 |
| 33 | Gisagara | female | 0.609 | 0.435 | 0.339 |
| 66 | Karongi | age | 47.384 | 44.681 | 0.339 |
| 95 | Kayonza | Total.C | 2.556 | 2.400 | 0.343 |
| 82 | Kayonza | age | 46.500 | 42.345 | 0.351 |
| 64 | Huye | Total.N | 0.148 | 0.143 | 0.353 |
| 23 | Gatsibo_NLWH | n_season_compost | 3.420 | 2.632 | 0.366 |
| 92 | Kayonza | m3.Ca | 1863.601 | 1664.716 | 0.366 |
| 177 | Rutsiro | female | 0.627 | 0.542 | 0.375 |
| 183 | Rutsiro | n_season_compost | 7.602 | 8.163 | 0.396 |
| 190 | Rutsiro | pH | 5.342 | 5.261 | 0.396 |
| 11 | Gatsibo_LWH | n_seasons_leg_2 | 2.898 | 1.983 | 0.399 |
| 49 | Huye | female | 0.591 | 0.500 | 0.446 |
| 84 | Kayonza | own | 0.897 | 0.964 | 0.446 |
| 85 | Kayonza | field.size | 423.776 | 664.236 | 0.446 |
| 96 | Kayonza | Total.N | 0.187 | 0.180 | 0.446 |
| 108 | Mugonero | m3.Ca | 604.767 | 555.744 | 0.446 |
| 122 | Nyamagabe | n_seasons_leg_1 | 1.125 | 1.625 | 0.446 |
| 168 | Nyaruguru | n_season_lime | 0.000 | 0.043 | 0.446 |
| 185 | Rutsiro | n_season_fallow | 0.289 | 0.470 | 0.446 |
| 207 | Rwamaga | Total.C | 2.232 | 2.169 | 0.446 |
| 24 | Gatsibo_NLWH | n_season_lime | 0.025 | 0.069 | 0.455 |
| 158 | Nyanza | pH | 5.999 | 6.105 | 0.46 |
| 164 | Nyaruguru | own | 0.935 | 0.848 | 0.46 |
| 188 | Rutsiro | m3.Ca | 685.223 | 629.522 | 0.46 |
| 203 | Rwamaga | n_seasons_leg_2 | 1.685 | 0.536 | 0.46 |
| 174 | Nyaruguru | pH | 5.210 | 5.319 | 0.466 |
| 71 | Karongi | n_season_compost | 6.553 | 7.074 | 0.472 |
| 130 | Nyamasheke | age | 48.123 | 45.898 | 0.499 |
| 156 | Nyanza | m3.Ca | 987.213 | 1083.103 | 0.499 |
| 68 | Karongi | own | 0.981 | 0.957 | 0.499 |
| 58 | Huye | n_seasons_leg_1 | 1.043 | 1.386 | 0.522 |
| 173 | Nyaruguru | m3.Mg | 138.204 | 151.996 | 0.522 |
| 110 | Mugonero | pH | 5.242 | 5.181 | 0.531 |
| 157 | Nyanza | m3.Mg | 210.773 | 230.037 | 0.531 |
| 67 | Karongi | hhsize | 4.912 | 5.172 | 0.536 |
| 175 | Nyaruguru | Total.C | 2.229 | 2.140 | 0.548 |
| 142 | Nyamasheke | pH | 5.123 | 5.179 | 0.568 |
| 3 | Gatsibo_LWH | hhsize | 5.763 | 5.362 | 0.616 |
| 42 | Gisagara | n_seasons_leg_1 | 2.870 | 2.304 | 0.616 |
| 56 | Huye | n_season_lime | 0.113 | 0.035 | 0.616 |
| 204 | Rwamaga | m3.Ca | 1376.064 | 1298.489 | 0.616 |
| 189 | Rutsiro | m3.Mg | 174.684 | 163.413 | 0.616 |
| 165 | Nyaruguru | field.size | 495.239 | 570.022 | 0.621 |
| 4 | Gatsibo_LWH | own | 1.000 | 0.983 | 0.622 |
| 52 | Huye | own | 0.991 | 0.974 | 0.622 |
| 93 | Kayonza | m3.Mg | 357.168 | 336.861 | 0.622 |
| 100 | Mugonero | own | 0.915 | 0.947 | 0.622 |
| 128 | Nyamagabe | Total.N | 0.165 | 0.169 | 0.622 |
| 191 | Rutsiro | Total.C | 2.298 | 2.245 | 0.622 |
| 97 | Mugonero | female | 0.713 | 0.656 | 0.627 |
| 181 | Rutsiro | field.size | 432.241 | 581.313 | 0.63 |
| 79 | Karongi | Total.C | 1.832 | 1.877 | 0.636 |
| 8 | Gatsibo_LWH | n_season_lime | 0.322 | 0.500 | 0.645 |
| 37 | Gisagara | field.size | 559.261 | 493.957 | 0.645 |
| 94 | Kayonza | pH | 6.203 | 6.129 | 0.645 |
| 139 | Nyamasheke | n_seasons_leg_2 | 3.575 | 24.796 | 0.645 |
| 5 | Gatsibo_LWH | field.size | 916.441 | 776.345 | 0.659 |
| 107 | Mugonero | n_seasons_leg_2 | 4.031 | 3.511 | 0.659 |
| 127 | Nyamagabe | Total.C | 2.266 | 2.336 | 0.659 |
| 187 | Rutsiro | n_seasons_leg_2 | 2.584 | 2.048 | 0.659 |
| 206 | Rwamaga | pH | 5.873 | 5.817 | 0.659 |
| 101 | Mugonero | field.size | 393.124 | 441.359 | 0.66 |
| 171 | Nyaruguru | n_seasons_leg_2 | 3.370 | 5.326 | 0.661 |
| 208 | Rwamaga | Total.N | 0.178 | 0.175 | 0.661 |
| 25 | Gatsibo_NLWH | n_season_fallow | 0.543 | 0.736 | 0.667 |
| 129 | Nyamasheke | female | 0.692 | 0.646 | 0.683 |
| 57 | Huye | n_season_fallow | 0.487 | 0.675 | 0.684 |
| 133 | Nyamasheke | field.size | 580.130 | 787.527 | 0.684 |
| 147 | Nyanza | hhsize | 4.891 | 5.261 | 0.684 |
| 192 | Rutsiro | Total.N | 0.159 | 0.157 | 0.684 |
| 172 | Nyaruguru | m3.Ca | 615.230 | 665.044 | 0.687 |
| 144 | Nyamasheke | Total.N | 0.167 | 0.165 | 0.69 |
| 117 | Nyamagabe | field.size | 282.643 | 310.929 | 0.7 |
| 34 | Gisagara | age | 46.848 | 44.761 | 0.703 |
| 10 | Gatsibo_LWH | n_seasons_leg_1 | 4.678 | 4.345 | 0.715 |
| 69 | Karongi | field.size | 446.088 | 490.788 | 0.715 |
| 109 | Mugonero | m3.Mg | 169.468 | 159.707 | 0.715 |
| 178 | Rutsiro | age | 44.765 | 43.542 | 0.715 |
| 77 | Karongi | m3.Mg | 269.452 | 254.229 | 0.715 |
| 21 | Gatsibo_NLWH | field.size | 1459.901 | 1306.069 | 0.739 |
| 55 | Huye | n_season_compost | 4.878 | 5.211 | 0.739 |
| 12 | Gatsibo_LWH | m3.Ca | 1139.996 | 1206.971 | 0.754 |
| 39 | Gisagara | n_season_compost | 3.957 | 4.326 | 0.754 |
| 78 | Karongi | pH | 5.477 | 5.441 | 0.754 |
| 80 | Karongi | Total.N | 0.142 | 0.144 | 0.754 |
| 90 | Kayonza | n_seasons_leg_1 | 2.155 | 1.982 | 0.754 |
| 153 | Nyanza | n_season_fallow | 0.870 | 1.130 | 0.754 |
| 180 | Rutsiro | own | 0.934 | 0.916 | 0.754 |
| 199 | Rwamaga | n_season_compost | 3.748 | 4.018 | 0.754 |
| 106 | Mugonero | n_seasons_leg_1 | 1.186 | 1.336 | 0.764 |
| 19 | Gatsibo_NLWH | hhsize | 5.160 | 5.333 | 0.766 |
| 91 | Kayonza | n_seasons_leg_2 | 1.259 | 1.091 | 0.766 |
| 116 | Nyamagabe | own | 0.982 | 0.964 | 0.766 |
| 169 | Nyaruguru | n_season_fallow | 0.261 | 0.370 | 0.766 |
| 36 | Gisagara | own | 0.870 | 0.826 | 0.767 |
| 9 | Gatsibo_LWH | n_season_fallow | 0.288 | 0.397 | 0.769 |
| 119 | Nyamagabe | n_season_compost | 7.518 | 7.857 | 0.769 |
| 120 | Nyamagabe | n_season_lime | 0.393 | 0.500 | 0.78 |
| 61 | Huye | m3.Mg | 190.397 | 186.200 | 0.789 |
| 141 | Nyamasheke | m3.Mg | 147.629 | 153.726 | 0.789 |
| 53 | Huye | field.size | 567.922 | 599.096 | 0.8 |
| 123 | Nyamagabe | n_seasons_leg_2 | 2.929 | 3.161 | 0.8 |
| 29 | Gatsibo_NLWH | m3.Mg | 254.332 | 246.971 | 0.814 |
| 132 | Nyamasheke | own | 0.945 | 0.932 | 0.814 |
| 140 | Nyamasheke | m3.Ca | 590.765 | 613.771 | 0.814 |
| 167 | Nyaruguru | n_season_compost | 6.043 | 5.696 | 0.814 |
| 149 | Nyanza | field.size | 766.217 | 853.359 | 0.818 |
| 170 | Nyaruguru | n_seasons_leg_1 | 2.348 | 2.043 | 0.819 |
| 154 | Nyanza | n_seasons_leg_1 | 2.283 | 2.543 | 0.828 |
| 197 | Rwamaga | field.size | 842.054 | 892.855 | 0.831 |
| 202 | Rwamaga | n_seasons_leg_1 | 1.568 | 1.682 | 0.831 |
| 126 | Nyamagabe | pH | 5.022 | 5.002 | 0.836 |
| 200 | Rwamaga | n_season_lime | 0.324 | 0.355 | 0.836 |
| 60 | Huye | m3.Ca | 870.054 | 853.743 | 0.847 |
| 62 | Huye | pH | 5.665 | 5.684 | 0.848 |
| 30 | Gatsibo_NLWH | pH | 6.061 | 6.038 | 0.852 |
| 103 | Mugonero | n_season_compost | 6.070 | 6.229 | 0.872 |
| 137 | Nyamasheke | n_season_fallow | 0.664 | 0.599 | 0.872 |
| 76 | Karongi | m3.Ca | 803.893 | 787.032 | 0.89 |
| 16 | Gatsibo_LWH | Total.N | 0.146 | 0.148 | 0.916 |
| 13 | Gatsibo_LWH | m3.Mg | 235.470 | 239.641 | 0.929 |
| 14 | Gatsibo_LWH | pH | 6.119 | 6.138 | 0.929 |
| 15 | Gatsibo_LWH | Total.C | 1.866 | 1.887 | 0.929 |
| 63 | Huye | Total.C | 1.919 | 1.910 | 0.929 |
| 105 | Mugonero | n_season_fallow | 0.915 | 0.863 | 0.929 |
| 121 | Nyamagabe | n_season_fallow | 0.304 | 0.268 | 0.929 |
| 124 | Nyamagabe | m3.Ca | 449.585 | 456.505 | 0.929 |
| 125 | Nyamagabe | m3.Mg | 108.307 | 106.662 | 0.929 |
| 143 | Nyamasheke | Total.C | 2.285 | 2.298 | 0.929 |
| 159 | Nyanza | Total.C | 1.778 | 1.761 | 0.929 |
| 28 | Gatsibo_NLWH | m3.Ca | 1246.188 | 1230.161 | 0.931 |
| 75 | Karongi | n_seasons_leg_2 | 3.956 | 3.816 | 0.931 |
| 195 | Rwamaga | hhsize | 4.982 | 4.927 | 0.931 |
| 83 | Kayonza | hhsize | 5.155 | 5.091 | 0.932 |
| 7 | Gatsibo_LWH | n_season_compost | 5.780 | 5.672 | 0.943 |
| 112 | Mugonero | Total.N | 0.153 | 0.154 | 0.943 |
| 89 | Kayonza | n_season_fallow | 0.483 | 0.455 | 0.952 |
| 160 | Nyanza | Total.N | 0.134 | 0.134 | 0.962 |
| 31 | Gatsibo_NLWH | Total.C | 1.994 | 2.002 | 0.964 |
| 186 | Rutsiro | n_seasons_leg_1 | 3.452 | 3.416 | 0.964 |
| 151 | Nyanza | n_season_compost | 3.326 | 3.261 | 0.966 |
| 32 | Gatsibo_NLWH | Total.N | 0.158 | 0.157 | 0.977 |
| 26 | Gatsibo_NLWH | n_seasons_leg_1 | 1.556 | 1.540 | 0.978 |
| 73 | Karongi | n_season_fallow | 0.843 | 0.834 | 0.978 |
| 74 | Karongi | n_seasons_leg_1 | 2.497 | 2.485 | 0.978 |
| 87 | Kayonza | n_season_compost | 3.534 | 3.564 | 0.978 |
| 111 | Mugonero | Total.C | 2.203 | 2.206 | 0.978 |
| 162 | Nyaruguru | age | 48.304 | 48.457 | 0.978 |
| 40 | Gisagara | n_season_lime | 0.022 | 0.022 | 1 |
| 152 | Nyanza | n_season_lime | 0.000 | 0.000 | NA |
Demographic variables interpretation here.
Agricultural practice variables interpretation here
Soil Variables interpretation here
write.csv(dist.output, file=paste(od, "district balance.csv", sep="/"), row.names=T)
Look at farmers by duration of tenure farming with Tubura. We want to understand, at least with an initial naive baseline sense, what is the cumulative effect of Tubura practices on soil health outcomes?
We will look only at current Tubura farmers and compare first year farmers to farmers with more experience with Tubura.
oafOnly <- d[which(d$client==0 & d$total.seasons>=1),]
for(i in 1:length(soilVars)){
print(
ggplot(oafOnly, aes(x=as.factor(total.seasons), y=oafOnly[,soilVars[i]])) +
geom_boxplot() +
labs(x="Tubura Tenure", y=soilVars[i], title = paste("RW baseline soil by tenure - ", soilVars[i], sep = ""))
)
}
tenureSum <- aggregate(oafOnly[,out.list], by=list(oafOnly$total.seasons), function(x){
round(mean(x, na.rm=T),2)
})
tenureSum <- as.data.frame(t(tenureSum))
colnames(tenureSum) <- c(paste(seq(1,11,1), " seas.", sep = ""), "13 seas.")
print(kable(tenureSum))
| 1 seas. | 2 seas. | 3 seas. | 4 seas. | 5 seas. | 6 seas. | 7 seas. | 8 seas. | 9 seas. | 10 seas. | 11 seas. | 13 seas. | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Group.1 | 1.00 | 2.00 | 3.00 | 4.00 | 5.00 | 6.00 | 7.00 | 8.00 | 9.00 | 10.00 | 11.00 | 13.00 |
| female | 0.70 | 0.61 | 0.67 | 0.65 | 0.64 | 0.58 | 0.75 | 0.33 | 0.33 | 0.75 | 0.67 | 0.00 |
| age | 48.77 | 46.43 | 46.46 | 48.96 | 42.73 | 43.83 | 42.12 | 52.33 | 36.00 | 48.50 | 42.00 | 52.00 |
| hhsize | 5.30 | 5.40 | 5.71 | 4.91 | 6.09 | 5.25 | 5.50 | 5.83 | 6.67 | 6.50 | 8.67 | 6.00 |
| own | 0.92 | 0.97 | 1.00 | 1.00 | 0.91 | 1.00 | 1.00 | 0.83 | 1.00 | 1.00 | 1.00 | 1.00 |
| field.size | 564.72 | 595.76 | 390.17 | 547.13 | 976.18 | 289.17 | 346.75 | 602.33 | 256.00 | 241.00 | 334.67 | 480.00 |
| n_season_fert | 1.03 | 1.27 | 3.17 | 1.65 | 1.82 | 2.83 | 2.38 | 5.33 | 2.67 | 3.75 | 0.33 | 10.00 |
| n_season_compost | 5.48 | 5.61 | 6.58 | 4.43 | 5.82 | 6.50 | 6.75 | 8.33 | 10.00 | 8.25 | 4.67 | 10.00 |
| n_season_lime | 0.27 | 0.17 | 0.25 | 0.09 | 0.18 | 0.33 | 0.00 | 0.17 | 0.00 | 0.00 | 0.00 | 0.00 |
| n_season_fallow | 0.53 | 0.74 | 0.58 | 1.00 | 0.45 | 0.50 | 0.88 | 0.00 | 0.00 | 0.00 | 1.33 | 0.00 |
| n_seasons_leg_1 | 2.12 | 2.14 | 1.96 | 2.00 | 1.55 | 1.25 | 0.62 | 3.00 | 4.00 | 2.25 | 4.33 | 1.00 |
| n_seasons_leg_2 | 3.13 | 2.85 | 4.50 | 2.57 | 6.27 | 2.33 | 5.88 | 3.50 | 3.33 | 5.25 | 0.00 | 9.00 |
| m3.Ca | 1127.45 | 946.96 | 1034.46 | 686.67 | 678.56 | 625.67 | 870.40 | 443.64 | 1153.25 | 483.06 | 746.88 | 632.20 |
| m3.Mg | 247.35 | 226.65 | 252.39 | 165.35 | 153.59 | 138.80 | 217.69 | 120.77 | 307.42 | 153.92 | 177.55 | 224.80 |
| pH | 5.72 | 5.56 | 5.70 | 5.20 | 5.34 | 5.34 | 5.64 | 5.10 | 5.87 | 5.18 | 5.50 | 5.54 |
| Total.C | 2.13 | 2.19 | 2.19 | 2.36 | 2.01 | 2.06 | 1.83 | 2.36 | 1.85 | 2.26 | 1.77 | 2.06 |
| Total.N | 0.16 | 0.16 | 0.16 | 0.17 | 0.15 | 0.15 | 0.14 | 0.16 | 0.15 | 0.15 | 0.13 | 0.16 |
oafOnly$tenured <- ifelse(oafOnly$total.seasons==1,0,1)
tenure <- do.call(rbind, lapply(out.list, function(x) {
out <- t.test(oafOnly[,x] ~ oafOnly[,"tenured"], data=oafOnly)
tab <- data.frame(out[[5]][[1]], out[[5]][[2]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
return(tab)
}))
# use p.adjust with bonferroni correction
tenure$pvalue <- p.adjust(tenure$pvalue, method="fdr")
rownames(tenure) <- out.list
tenure <- tenure[order(tenure$pvalue),]
tenure$pvalue <- ifelse(tenure[, 3] < 0.001, "< 0.001", round(tenure[, 3], 3))
colnames(tenure) <- c("Non-Tubura", "Tubura Client", "p-value")
print(kable(tenure))
| Non-Tubura | Tubura Client | p-value | |
|---|---|---|---|
| n_season_fert | 1.032 | 1.944 | < 0.001 |
| m3.Ca | 1127.451 | 863.314 | 0.003 |
| pH | 5.724 | 5.492 | 0.003 |
| m3.Mg | 247.352 | 208.608 | 0.033 |
| n_season_lime | 0.272 | 0.162 | 0.105 |
| own | 0.920 | 0.975 | 0.115 |
| female | 0.704 | 0.614 | 0.219 |
| age | 48.768 | 46.213 | 0.235 |
| n_season_compost | 5.480 | 5.914 | 0.56 |
| hhsize | 5.304 | 5.523 | 0.574 |
| n_season_fallow | 0.528 | 0.680 | 0.574 |
| Total.C | 2.127 | 2.170 | 0.602 |
| n_seasons_leg_2 | 3.128 | 3.365 | 0.746 |
| field.size | 564.724 | 540.751 | 0.833 |
| n_seasons_leg_1 | 2.120 | 2.036 | 0.833 |
| Total.N | 0.160 | 0.160 | 0.835 |
Tubura tenure is being defined here as having more than 1 year experience farming with Tubura.
Demographic variables We are well balanced along demographic variables.
Agricultural practice variables Not surprisingly, Tubura farmers have more cumulative years of fertilizer use than current non-Tubura farmers. While that difference is signficant, it is realistically only a single season of fertilizer use different.
Interestingly, non-Tubura farmers reported using more lime than current Tubura farmers. This
Soil Variables Soil pH, calcium and magnesium levels are lower for tenured Tubura farmers. This is consistent with the hypothesis that increaesd fertilizer use leads to an increaese in soil acidity.
Here’s where we’ll look at the contribution of fertilizer, lime and cultivation practices on soil health outcomes. This analysis will be come richer as we gain longitudinal measures. I caution that we cannot treat these relationships as causal. The direction of causality is not clearly delineated in the data or the study design. However, we can identify meaningful connections between practices and outcomes through this analysis to generate new hypotheses for field testing.
I’m going to start with behaviors by sections and then move to a more comprehensive model including multiple practices. All models will include controls for site to account for local variation and field officer behavior.
suppressMessages(library(stargazer))
list1 <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", "n_season_fert + n_season_compost + n_season_lime + n_season_fallow + as.factor(district)", sep="")), data=d)
return(mod)
})
stargazer(list1, type="html",
title = "2016A Rwanda Soil Health Baseline - Naive Agronomic Practice Models",
covariate.labels = c("Seasons of Fertilizer", "Seasons of Compost",
"Seasons of Lime", "Seasons of Fallow"),
dep.var.caption = "",
dep.var.labels = "",
column.labels = c(gsub("m3.","", soilVars)),
notes = "Includes FE for district",
omit=c("district"), out=paste(od, "rw_baseline_agprac.htm", sep="/"))
| Ca | Mg | pH | Total.N | Total.C | |
| (1) | (2) | (3) | (4) | (5) | |
| Seasons of Fertilizer | -2.639 | 0.192 | 0.006* | -0.001*** | -0.013*** |
| (3.471) | (0.855) | (0.003) | (0.0002) | (0.004) | |
| Seasons of Compost | 3.629 | 0.048 | 0.004 | 0.0003* | 0.003 |
| (2.720) | (0.670) | (0.003) | (0.0002) | (0.003) | |
| Seasons of Lime | 23.072 | 3.831 | 0.010 | 0.002** | 0.030* |
| (15.158) | (3.734) | (0.015) | (0.001) | (0.016) | |
| Seasons of Fallow | -31.367*** | -8.037*** | -0.042*** | 0.001** | 0.014** |
| (5.584) | (1.376) | (0.005) | (0.0003) | (0.006) | |
| Constant | 1,158.974*** | 238.052*** | 6.103*** | 0.146*** | 1.867*** |
| (43.596) | (10.740) | (0.043) | (0.003) | (0.046) | |
| Observations | 2,443 | 2,443 | 2,443 | 2,443 | 2,443 |
| R2 | 0.367 | 0.237 | 0.425 | 0.192 | 0.159 |
| Adjusted R2 | 0.363 | 0.232 | 0.421 | 0.186 | 0.153 |
| Residual Std. Error (df = 2426) | 437.575 | 107.798 | 0.430 | 0.026 | 0.466 |
| F Statistic (df = 16; 2426) | 87.957*** | 47.177*** | 112.103*** | 35.920*** | 28.612*** |
| Note: | p<0.1; p<0.05; p<0.01 | ||||
| Includes FE for district | |||||
Interpretation The naive model suggests that when we include site level fixed effects, duration of agronomic practices don’t have a big effect on soil health outcomes. However, some of the practice intensity variables are not well distributed. Let’s take a look at a log transformation. I’m adding one to the variables as to not end up with lots of Inf values.
agPrac <- c(names(d[grep('n_season_', names(d))]))
for(i in 1:length(agPrac)){
print(
ggplot(d, aes(x=d[,agPrac[i]])) + geom_histogram(binwidth = 1) +
labs(x = agPrac[i])
)
}
# since these are all skewed, consider a log transform
for(i in 1:length(agPrac)){
print(
ggplot(d, aes(x=log(d[,agPrac[i]]+1))) + geom_histogram(binwidth = 1) +
labs(x = agPrac[i])
)
}
d$logFert <- log(d$n_season_fert+1)
d$logCompost <- log(d$n_season_compost+1)
d$logLime <- log(d$n_season_lime+1)
d$logFallow <- log(d$n_season_fallow+1)
Let’s look at the log results:
logVars <- paste(names(d[grep("log", names(d))]), collapse=" + ")
list2 <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", logVars, "+ as.factor(district)", sep="")), data=d)
return(mod)
})
stargazer(list2, type="html",
title = "2016A Rwanda Soil Health Baseline - Naive Agronomic Practice Models",
covariate.labels = c("Seasons of Fertilizer (log)", "Seasons of Compost (log)", "Seasons of Lime (log)", "Seasons of Fallow (log)"),
dep.var.caption = "",
dep.var.labels = "",
column.labels = c(gsub("m3.","", soilVars)),
notes = "Includes FE for district",
omit=c("district"), out=paste(od, "rw_baseline_agprac_log.htm", sep="/"))
| Ca | Mg | pH | Total.N | Total.C | |
| (1) | (2) | (3) | (4) | (5) | |
| Seasons of Fertilizer (log) | -12.831 | -0.801 | 0.018 | -0.003*** | -0.048*** |
| (12.150) | (2.995) | (0.012) | (0.001) | (0.013) | |
| Seasons of Compost (log) | 17.314 | -0.115 | 0.015 | 0.002** | 0.018 |
| (12.513) | (3.084) | (0.012) | (0.001) | (0.013) | |
| Seasons of Lime (log) | 61.473* | 9.814 | 0.024 | 0.004** | 0.078** |
| (33.355) | (8.222) | (0.033) | (0.002) | (0.036) | |
| Seasons of Fallow (log) | -114.067*** | -28.722*** | -0.151*** | 0.002** | 0.038** |
| (16.284) | (4.014) | (0.016) | (0.001) | (0.017) | |
| Constant | 1,159.526*** | 240.777*** | 6.111*** | 0.146*** | 1.859*** |
| (45.388) | (11.188) | (0.045) | (0.003) | (0.049) | |
| Observations | 2,443 | 2,443 | 2,443 | 2,443 | 2,443 |
| R2 | 0.372 | 0.243 | 0.432 | 0.192 | 0.159 |
| Adjusted R2 | 0.368 | 0.238 | 0.428 | 0.186 | 0.153 |
| Residual Std. Error (df = 2426) | 435.826 | 107.428 | 0.428 | 0.026 | 0.466 |
| F Statistic (df = 16; 2426) | 89.885*** | 48.550*** | 115.129*** | 35.963*** | 28.644*** |
| Note: | p<0.1; p<0.05; p<0.01 | ||||
| Includes FE for district | |||||
Interpretation: When we transform the variables to log, the data starts to tell a more coherent story.
Let’s look first at a naive model of One Acre Fund tenure on soil health. Remember: these data are not longitudinal! These data are not longitudinal and reflect farmer selection into One Acre Fund. While these models will try to be both robust and parsimonious, we will inevitabily suffer omitted variable bias due to a lack of an instrument.
list3 <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", "total.seasons + as.factor(district)", sep="")), data=d)
return(mod)
})
stargazer(list3, type="html",
title = "2016A Rwanda Soil Health Baseline - Naive Tenure Models",
covariate.labels = c("OAF Tenure"),
dep.var.caption = "",
dep.var.labels = "",
column.labels = c(gsub("m3.","", soilVars)),
notes = "Includes FE for district",
omit=c("district"), out=paste(od, "rw_baseline_tenure.htm", sep="/"))
| Ca | Mg | pH | Total.N | Total.C | |
| (1) | (2) | (3) | (4) | (5) | |
| OAF Tenure | 3.388 | 1.679** | 0.011*** | -0.001*** | -0.010*** |
| (2.945) | (0.725) | (0.003) | (0.0002) | (0.003) | |
| Constant | 1,166.566*** | 234.251*** | 6.108*** | 0.149*** | 1.896*** |
| (41.145) | (10.124) | (0.041) | (0.002) | (0.044) | |
| Observations | 2,443 | 2,443 | 2,443 | 2,443 | 2,443 |
| R2 | 0.357 | 0.227 | 0.411 | 0.189 | 0.156 |
| Adjusted R2 | 0.354 | 0.223 | 0.407 | 0.184 | 0.151 |
| Residual Std. Error (df = 2429) | 440.658 | 108.424 | 0.435 | 0.026 | 0.467 |
| F Statistic (df = 13; 2429) | 103.913*** | 55.019*** | 130.144*** | 43.489*** | 34.414*** |
| Note: | p<0.1; p<0.05; p<0.01 | ||||
| Includes FE for district | |||||
Interpretation: The naive One Acre Fund tenure model suggest that across the board that additional years of 1AF practices have a negative effect on soil health parameters. Let’s combine 1AF tenure with the agronomic practices model above to build a more robust model:
list4 <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", logVars, "+ total.seasons + as.factor(district)", sep="")), data=d)
return(mod)
})
stargazer(list4, type="html",
title = "2016A Rwanda Soil Health Baseline - Ag Practice and Tenure",
covariate.labels = c("Seasons of Fertilizer (log)", "Seasons of Compost (log)", "Seasons of Lime (log)", "Seasons of Fallow (log)", "OAF Tenure"),
dep.var.caption = "",
dep.var.labels = "",
column.labels = c(gsub("m3.","", soilVars)),
notes = "Includes FE for district",
omit=c("district"), out=paste(od, "rw_baseline_ag_tenure.htm", sep="/"))
| Ca | Mg | pH | Total.N | Total.C | |
| (1) | (2) | (3) | (4) | (5) | |
| Seasons of Fertilizer (log) | -25.873* | -6.471* | -0.006 | -0.002** | -0.033** |
| (14.577) | (3.589) | (0.014) | (0.001) | (0.016) | |
| Seasons of Compost (log) | 17.302 | -0.121 | 0.015 | 0.002** | 0.018 |
| (12.508) | (3.080) | (0.012) | (0.001) | (0.013) | |
| Seasons of Lime (log) | 63.791* | 10.822 | 0.028 | 0.004** | 0.075** |
| (33.375) | (8.217) | (0.033) | (0.002) | (0.036) | |
| Seasons of Fallow (log) | -114.747*** | -29.018*** | -0.152*** | 0.002** | 0.039** |
| (16.284) | (4.009) | (0.016) | (0.001) | (0.017) | |
| OAF Tenure | 5.760 | 2.504*** | 0.011*** | -0.001*** | -0.007* |
| (3.560) | (0.876) | (0.003) | (0.0002) | (0.004) | |
| Constant | 1,158.003*** | 240.115*** | 6.108*** | 0.146*** | 1.861*** |
| (45.382) | (11.174) | (0.044) | (0.003) | (0.049) | |
| Observations | 2,443 | 2,443 | 2,443 | 2,443 | 2,443 |
| R2 | 0.373 | 0.245 | 0.434 | 0.194 | 0.160 |
| Adjusted R2 | 0.368 | 0.240 | 0.430 | 0.188 | 0.154 |
| Residual Std. Error (df = 2425) | 435.680 | 107.270 | 0.427 | 0.026 | 0.466 |
| F Statistic (df = 17; 2425) | 84.808*** | 46.309*** | 109.252*** | 34.365*** | 27.180*** |
| Note: | p<0.1; p<0.05; p<0.01 | ||||
| Includes FE for district | |||||
Interpretation: Including agronomic practices and 1AF tenure in the same model dampens the magnitude, but not the significance, of 1AF tenure on soil health outcomes.
Thus far we have looked at aggregated historical plot level practices and their effect on soil health. We also asked farmers about their cultivation practices on their plot in the previous season, 15B. We have more precise information for fertilizer, compost and liming practices for the 15B season.
# this should be kg of fertilizer used in this field. Compost is off the charts. Convert this to compost per sq meter
d$fert_15b_log <- ifelse(d$field_kg_fert1_15b<=20,
log(d$field_kg_fert1_15b+1), NA)
d$field_dim <- d$field_dim1*d$field_dim2
d$compost_15b_sqm <- d$field_kg_compost_15b/d$field_dim
d$compost_15b_sqm_log <- log(d$compost_15b_sqm+1)
# more than 20 kg per are seems like too much.
suppressMessages(qplot(d$fert_15b_log, binwidth=1))
## Warning: Removed 1808 rows containing non-finite values (stat_bin).
suppressMessages(qplot(d$compost_15b_sqm_log, binwidth=1))
## Warning: Removed 143 rows containing non-finite values (stat_bin).
previousSeason <- paste("fert_15b_log", "compost_15b_sqm_log", "as.factor(district)", sep=" + ")
list5 <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", previousSeason, sep="")), data=d)
return(mod)
})
stargazer(list5, type="html",
title = "2016A Rwanda Soil Health Baseline - 15B practices",
covariate.labels = c("Fertilizer Rate (log)", "Compost Rate kg/m^2 (log)"),
dep.var.caption = "",
dep.var.labels = "",
column.labels = c(gsub("m3.","", soilVars)),
notes = "Includes FE for district",
omit=c("district"), out=paste(od, "rw_baseline_15b_ag.htm", sep="/"))
| Ca | Mg | pH | Total.N | Total.C | |
| (1) | (2) | (3) | (4) | (5) | |
| Fertilizer Rate (log) | -54.919** | -16.618** | -0.087*** | 0.001 | 0.020 |
| (27.711) | (7.772) | (0.031) | (0.002) | (0.034) | |
| Compost Rate kg/m(log) | 55.915 | 17.782 | -0.016 | 0.001 | 0.032 |
| (41.802) | (11.724) | (0.046) | (0.003) | (0.052) | |
| Constant | 1,362.562*** | 267.001*** | 6.357*** | 0.144*** | 1.789*** |
| (98.218) | (27.546) | (0.109) | (0.007) | (0.122) | |
| Observations | 635 | 635 | 635 | 635 | 635 |
| R2 | 0.447 | 0.255 | 0.380 | 0.196 | 0.146 |
| Adjusted R2 | 0.434 | 0.238 | 0.366 | 0.178 | 0.127 |
| Residual Std. Error (df = 620) | 382.674 | 107.324 | 0.424 | 0.026 | 0.476 |
| F Statistic (df = 14; 620) | 35.734*** | 15.131*** | 27.146*** | 10.782*** | 7.564*** |
| Note: | p<0.1; p<0.05; p<0.01 | ||||
| Includes FE for district | |||||
Let’s look at farmer perceived fertility as a predictor of soil health. We’ll set ‘same fertility’ as the reference category.
d$fertility_qual <- as.factor(d$general_field_infocompare_fertil)
d <- within(d, fertility_qual <- relevel(fertility_qual, ref="same"))
list6 <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", "+ fertility_qual + as.factor(district)", sep="")), data=d)
return(mod)
})
stargazer(list6, type="html",
title = "2016A Rwanda Soil Health Baseline - 15B practices",
covariate.labels = c("Farmer Opinion - Less Fertile",
"Farmer Opinion - More Fertile"),
dep.var.caption = "",
dep.var.labels = "",
column.labels = c(gsub("m3.","", soilVars)),
notes = "Reference category is same fertility as other fields. Includes FE for district",
notes.align = "l",
omit=c("district"), out=paste(od, "rw_baseline_fertility_qual.htm", sep="/"))
| Ca | Mg | pH | Total.N | Total.C | |
| (1) | (2) | (3) | (4) | (5) | |
| Farmer Opinion - Less Fertile | -106.273*** | -28.777*** | -0.144*** | 0.005*** | 0.079*** |
| (22.428) | (5.512) | (0.022) | (0.001) | (0.024) | |
| Farmer Opinion - More Fertile | 62.661** | 17.929*** | 0.066*** | -0.001 | -0.005 |
| (24.803) | (6.096) | (0.024) | (0.001) | (0.026) | |
| Constant | 1,189.851*** | 241.941*** | 6.153*** | 0.146*** | 1.859*** |
| (40.933) | (10.061) | (0.040) | (0.002) | (0.044) | |
| Observations | 2,443 | 2,443 | 2,443 | 2,443 | 2,443 |
| R2 | 0.367 | 0.240 | 0.422 | 0.187 | 0.156 |
| Adjusted R2 | 0.363 | 0.236 | 0.419 | 0.182 | 0.151 |
| Residual Std. Error (df = 2428) | 437.503 | 107.531 | 0.431 | 0.026 | 0.467 |
| F Statistic (df = 14; 2428) | 100.470*** | 54.903*** | 126.749*** | 39.872*** | 32.032*** |
| Note: | p<0.1; p<0.05; p<0.01 | ||||
| Reference category is same fertility as other fields. Includes FE for district | |||||
Interpretation: Farmers understand their fields well. Their categorization of which field are more and less fertile corresponds to our quantified measures of soil health. The only features farmers don’t seem to get correct are nitrogen and carbon. The nitrogen and carbon levels are actually higher in the fields farmers called ‘low fertility’ relative to the fields deemed to be the ‘same fertility.’ Reminder: We need to remember that farmers are only evaluting one of their fields thus we are not able to account for the quality of the farmer in assessing his/her fields.
We don’t have measured yield data for the Rwanda baseline. Skip this for now.
#crdref <- CRS('+proj=longlat +datum=WGS84')
crdref <- CRS('+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0')
e <- d[!is.na(d$lon),]
ss <- SpatialPointsDataFrame(coords = e[, c("lon", "lat")], data=e, proj4string = crdref)
rw <- getData("GADM", country='RW', level=3, path = "/Users/mlowes/drive/soil health study/data") # the higher the number, the higher the resolution
#ext.rw.ss matches points with spatial polygons in rw
ext.rw.ss <- extract(rw[, "NAME_3"], ss)
## Loading required namespace: rgeos
ss$spatialname <- ext.rw.ss$NAME_3
frw <- fortify(rw, region="NAME_3")
ss.soil <- aggregate(ss@data[,soilVars], by=list(ss@data$spatialname), function(x){
mean(x, na.rm=T)
})
plotReady <- dplyr::left_join(frw, ss.soil, by=c("id"="Group.1"))
Consider revising these maps to a smaller greographic unit. Add the name of the location for uninitiated users.
library(RColorBrewer)
mapList <- list()
for(i in 1:length(soilVars)){
mapRes <- ggplot(plotReady, aes(x=long, y=lat, group=group)) + geom_path() +
geom_polygon(aes(fill=plotReady[,soilVars[i]])) +
scale_fill_gradientn(colours = rev(brewer.pal(9,"Reds")), # define colors
name = soilVars[i],
guide = guide_colorbar(legend.direction = "vertical")) +
theme_bw() +
labs(title=paste("Rwanda long term soil health baseline - 2016", soilVars[i], sep= " "), x = "Longitude", y="Latitude")
mapList[[i]] <- mapRes
print(mapRes)
}
pdf(file=paste(md, "rw_shs_baseline_soil_maps.pdf", sep = "/"), width=11, height=8.5)
for(i in 1:length(mapList)){
print(mapList[[i]])
}
dev.off()
## quartz_off_screen
## 2
Interpolate soil health values for full operating area using soil health study values. We want to eventually add all Rwandan soil values into a single dataset to update and hone these values. See here for more guidanace
note:
The code below will run 5 K-fold cross validation to compare interpolation models. The output will be fed into the interpolate leaflet code below.
Check that I’m handling the projection correctly with Robert
#proj4string(ss) <- CRS("+init=epsg:4326")
ss <- spTransform(ss, CRS=("+proj=utm +zone=36N +datum=WGS84"))
suppressMessages(library(fields))
library(gstat)
library(htmltools)
# root mean sq error for evaluating models
RMSE <- function(observed, predicted) {
sqrt(mean((predicted - observed)^2, na.rm=TRUE))
}
# set k folds to 5
set.seed(20161030)
nfolds <- 5
k <- kfold(ss, nfolds) # from dismo
# cross validation of models
ensrmse <- tpsrmse <- idwrmse <- rep(NA, 5) # assing multiple objects at once
cv <- function(x) {
for(i in 1:nfolds) {
train <- ss[k!=i,]
test <- ss[k==i,]
train <- train[!is.na(train@data[,x]),]
# inverse distance weights
m <- gstat(formula=as.formula(paste(x, '~ 1')), locations=train)
p1 <- predict(m, newdata=test, debug.level=0)$var1.pred
idwrmse[i] <- RMSE(test@data[,x], p1) #idw rsme
# krieging
# m <- autoKrige(formula=as.formula(paste(x, "~ 1")), input_data = train)
# p2 <- predict(m, newdata=test, debug.level=0)$var1.pred
# krigrmse[i] <- RMSE(test$OZDLYAV, p2)
# thin plate spline
m <- Tps(coordinates(train), train@data[,x])
p3 <- predict(m, coordinates(test))
tpsrmse[i] <- RMSE(test@data[,x], p3)
w <- c(idwrmse[i], tpsrmse[i]) # combine the rmse
weights <- w / sum(w) # weight them
ensemble <- p1 * weights[1] + p3 * weights[2]
# multiply predictions by weights
ensrmse[i] <- RMSE(test@data[,x], ensemble) # truly an ensemble result?
}
output <- rbind(idwrmse, tpsrmse, ensrmse)
return(output)
}
Now loop over the variables of interest where x is the soilVar variable.
output <- lapply(soilVars, function(x){
ini <- data.frame(cv(x))
ini$ave <- apply(ini[,1:5], 1, function(y){mean(y, na.rm=T)})
res <- paste("Best model is ", row.names(ini[which.min(ini$ave),]), sep = "")
return(list(ini, res))
})
r <- raster(res=1/12)
r <- crop(r, floor(extent(rw)))
maps <- lapply(soilVars, function(x){
m <- Tps(coordinates(ss), ss@data[,x])
# make raster layer with model, raster is rwanda empty raster, model is m
tps <- interpolate(r, m)
tps <- crop(tps, rw)
tps <- mask(tps, rw) # cuts the tps raster down to the rw boundaries
x <- gsub("m3.", "", x)
pal <- colorNumeric(palette = "Reds", values(tps), na.color = "transparent")
suppressWarnings(
leaflet() %>% addTiles() %>%
addRasterImage(tps, colors=pal, opacity = 0.8) %>%
setView(lng=rwanda$longitude, lat=rwanda$latitude, zoom=8) %>%
addLegend(pal = pal, values = values(tps), title = paste("Soil Value ", x, sep=""))
)
})
tagList(maps)
Print out the interpolated values for inclusion in the report.
## NULL
## NULL
## NULL
## NULL
## NULL
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
## quartz_off_screen
## 2
This section will look at improving understanding of local context to inform local adaptation. It will also construct soil profiles to simplify the scaling of promising products and practices to targeted locations.
–end